# Replication Codes for Figure 2 in "Collaboration as a Tool for Equity? Reducing Racial and Ethnic Disparities in Healthcare Access"

# 1. Creating the dataset

library(readr)
library(dplyr)
library(stringr)

interps <- read_csv("Oregon_HCI.csv")

normalize_keys <- function(df) {
  df %>%
    mutate(
      county = county %>%
        str_trim() %>%
        str_remove(",\\s*Oregon$") %>%   
        str_remove("\\s*County$") %>%    
        str_squish(),
      year = as.integer(year)
    )
}
interps    <- normalize_keys(interps)

library(readr)
library(dplyr)
library(stringr)

ext <- read_csv("Kim_2073_External_Dataset_05012023.csv")

normalize_county <- function(df) {
  df %>%
    mutate(
      county = county %>%
        str_trim() %>%
        str_remove(",\\s*Oregon$") %>%
        str_remove("\\s*County$") %>%
        str_squish() %>%
        str_to_title()
    )
}

panel_merged2 <- interps %>%
  left_join(ext, by = "county")

# 2. Drawing the line graph

library(dplyr)
library(ggplot2)
library(readr)

# 2.1. Total interpreters

cohort_levels <- c("2010 adopters","2011 adopters","2014 adopters","2018 adopters","Non-adopters")

df_plot <- panel_merged2 %>%
  mutate(
    year = as.integer(year),
    yeartrt_ext = as.integer(yeartrt_ext),
    cohort = dplyr::case_when(
      yeartrt_ext == 2010 ~ "2010 adopters",
      yeartrt_ext == 2011 ~ "2011 adopters",
      yeartrt_ext == 2014 ~ "2014 adopters",
      yeartrt_ext == 2018 ~ "2018 adopters",
      yeartrt_ext == 0    ~ "Non-adopters",
      TRUE ~ NA_character_
    )
  ) %>%
  filter(!is.na(cohort), year >= 2010, year <= 2020) %>%
  group_by(year, cohort) %>%
  summarise(avg_total_interpreters = mean(total_interpreters, na.rm = TRUE), .groups = "drop") %>%
  mutate(cohort = factor(cohort, levels = cohort_levels))

adopt_years <- c(2010, 2011, 2014, 2018)

ggplot(df_plot, aes(x = year, y = avg_total_interpreters,
                    color = cohort, linetype = cohort, shape = cohort)) +  # <-- add shape
  geom_vline(xintercept = adopt_years, linetype = "dotted",
             color = "black", linewidth = 0.6, alpha = 0.8) +
  geom_line(linewidth = 1.2) +
  geom_line(linewidth = 1.2) +
  geom_point(size = 5) +
  scale_x_continuous(breaks = 2010:2020, limits = c(2010, 2020)) +
  scale_color_manual(values = c(
    "2010 adopters" = "#a1d99b",
    "2011 adopters" = "#74c476",
    "2014 adopters" = "#31a354",
    "2018 adopters" = "#006d2c",
    "Non-adopters"  = "#d73027"
  ), breaks = cohort_levels, drop = FALSE) +
  scale_linetype_manual(values = c(
    "2010 adopters" = "solid",
    "2011 adopters" = "solid",
    "2014 adopters" = "solid",
    "2018 adopters" = "solid",
    "Non-adopters"  = "dashed"
  ), breaks = cohort_levels, drop = FALSE) +
  scale_shape_manual(values = c(
    "2010 adopters" = 16,  # filled circle
    "2011 adopters" = 17,  # filled triangle
    "2014 adopters" = 15,  # filled square
    "2018 adopters" = 18,  # filled diamond
    "Non-adopters"  = 8    # star
  ), breaks = cohort_levels, drop = FALSE) +
  labs(x = "Year", y = "Total number of interpreters", color = "", linetype = "", shape = "") +
  theme_minimal(base_size = 13) +
  theme(legend.position = "bottom")

ggsave("total_interpreters_by_cohort.png", width = 8, height = 5, dpi = 600)

# 2.2. Spanish interpreters

cohort_levels <- c("2010 adopters","2011 adopters","2014 adopters","2018 adopters","Non-adopters")

df_plot_spanish <- panel_merged2 %>%
  mutate(
    year = as.integer(year),
    yeartrt_ext = as.integer(yeartrt_ext),
    cohort = dplyr::case_when(
      yeartrt_ext == 2010 ~ "2010 adopters",
      yeartrt_ext == 2011 ~ "2011 adopters",
      yeartrt_ext == 2014 ~ "2014 adopters",
      yeartrt_ext == 2018 ~ "2018 adopters",
      yeartrt_ext == 0    ~ "Non-adopters",
      TRUE ~ NA_character_
    )
  ) %>%
  filter(!is.na(cohort), year >= 2010, year <= 2020) %>%
  group_by(year, cohort) %>%
  summarise(avg_spanish_interpreters = mean(spanish_interpreters, na.rm = TRUE), .groups = "drop") %>%
  mutate(cohort = factor(cohort, levels = cohort_levels))

ggplot(df_plot_spanish,
       aes(x = year, y = avg_spanish_interpreters,
           color = cohort, linetype = cohort, shape = cohort)) +
  geom_vline(xintercept = adopt_years, linetype = "dotted",
             color = "black", linewidth = 0.6, alpha = 0.8) +
  geom_line(linewidth = 1.2) +
  geom_point(size = 5) +
  scale_x_continuous(breaks = 2010:2020, limits = c(2010, 2020)) +
  scale_color_manual(values = c(
    "2010 adopters" = "#a1d99b",  # light green
    "2011 adopters" = "#74c476",  # a little darker
    "2014 adopters" = "#31a354",  # far darker
    "2018 adopters" = "#006d2c",  # darkest green
    "Non-adopters"  = "#d73027"   # red
  ), breaks = cohort_levels, drop = FALSE) +
  scale_linetype_manual(values = c(
    "2010 adopters" = "solid",
    "2011 adopters" = "solid",
    "2014 adopters" = "solid",
    "2018 adopters" = "solid",
    "Non-adopters"  = "dashed"
  ), breaks = cohort_levels, drop = FALSE) +
  scale_shape_manual(values = c(
    "2010 adopters" = 16,  # filled circle
    "2011 adopters" = 17,  # filled triangle
    "2014 adopters" = 15,  # filled square
    "2018 adopters" = 18,  # filled diamond
    "Non-adopters"  = 8    # star
  ), breaks = cohort_levels, drop = FALSE) +
  labs(
    x = "Year",
    y = "Number of Spanish-speaking interpreters",
    color = "",
    linetype = "",
    shape = "",
  ) +
  theme_minimal(base_size = 13) +
  theme(legend.position = "bottom")

ggsave("spanish_interpreters_by_cohort.png", width = 8, height = 5, dpi = 600)